1 Learning Outcomes

2 The usmap package

3 Get and Clean COVID Data

3.1 Get Data

  • Updated Daily

     url_in <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
    df <- tibble(file_names = c("time_series_covid19_confirmed_US.csv",
                        "time_series_covid19_deaths_US.csv"))
    df %>% 
      mutate(url = str_c(url_in, file_names, sep =""),
             data = map(url, ~read_csv(., col_types = cols(FIPS = col_factor()), na = "")),
             case_type = as.factor(str_extract(file_names,"[:alpha:]*_[gU][:alpha:]*"))) %>%
      select(case_type, data) ->
      df
    
     df$vars <- map(df$data,  names)

3.2 Clean Data

  • Fix names and create consistent variables

     #create a helper function
        fix_names <- function(df, pattern, repl){
         stopifnot(is.data.frame(df), is.character(pattern), is.character(repl))
          names(df) <- str_replace_all(names(df), pattern, repl)
          return(df)
        }
    df %>% 
      mutate(data = map(data, ~ fix_names(., "Admin2", "County")),
             data = map(data, ~ fix_names(., "Long_", "Long")),
             data = map_if(data, !str_detect(df$case_type,"deaths_US"),
                           ~ mutate(., Population = 0)),
             data = map(data, ~filter(., !is.na(FIPS))),
             data = map(data, ~ mutate(., cFIPS = str_extract(UID,".{5}$"),
                                       sFIPS = str_extract(cFIPS, "^..")))
             )->df
    
    df$vars <- map(df$data,  names)

3.3 Tidy Data

  • Reshape and Fix Date Class

    df %>% 
      mutate(data = map(data, ~ pivot_longer(data = ., cols = contains("/"), 
                                             names_to = "Date", 
                                             values_to = "Daily_Total") ))-> 
    df
    
    #helper function for Date Change
    fix_date_mdy <- function(df,var_name){
      stopifnot(is.data.frame(df), is.character(var_name))
      df[[var_name]]<- mdy(df[[var_name]])
      return(df)
    }
    
    df %>%
      mutate(data = map(data, ~fix_date_mdy(., "Date")))-> 
    df

3.4 Unnest into Data Frame

  • Save as new df and remove the old df and vars

       df %>% 
      unnest(cols=data) %>% 
      ungroup() ->
      df_all
       # rm(df)
    df_all <- select(df_all, -vars)

3.5 Add Variables

  • Fix Population and Daily Totals

    df_all %>% 
      group_by(UID) %>% 
      filter(!is.na(cFIPS), County !="Unassigned", !str_detect(County, "^Out of ")) %>% 
      mutate(Population = max(Population),
             Daily_Total_percap = Daily_Total/Population)->
      df_all
    #write_csv(df_all, "./data/covid_us_data.csv")

4 usmap Package

4.1 Create a map of the states

  • Empty map

    plot_usmap(regions = "states") + 
      labs(title = "US States",
           subtitle = "This is a blank map of the states of the United States.") + 
      theme(panel.background = element_rect(color = "black", fill = "lightblue"))

4.2 Create a map of the South Atlantic region

  • Predefined Census Regions

    plot_usmap(include = .south_atlantic, labels = TRUE) + 
      labs(title = "US South Atlantic States",
           subtitle = "This is a blank map of the states in the South Atlantic Census Region") + 
      theme(panel.background = element_rect(color = "black", fill = "lightblue"))

4.3 Create a map of the Counties

  • Empty Map

    plot_usmap(regions = "counties") + 
      labs(title = "US Counties",
           subtitle = "This is a blank map of the counties of the United States.") + 
      theme(panel.background = element_rect(color = "black", fill = "lightblue"))

4.4 Create a map of the Counties in Specific States

  • include= States by US abbreviations

    plot_usmap("counties", include = c("VA", "MD", "DC")) +
      labs(title = "Custom DMV Area",
           subtitle = "These are the counties in MD, DC< anD VA.")+ 
      theme(panel.background = element_rect(color = "blue", fill = "lightblue"))

4.5 Create a map of the Specific Counties

  • Select by FIPS (five digit code)

    dmv_county_fips <- c("11001","24003","24009","24017","24021","24027","24031",
                         "24033","51013","51059","51061","51107","51153","51179",
                         "51510","51600","51610","51630","51683","51685")
    plot_usmap("counties", include = dmv_county_fips) +
      labs(title = "Greater DMV Area",
           subtitle = "These are the Counties/Cities in the Greater DMV area.")+ 
      theme(panel.background = element_rect(color = "blue", fill = "lightblue"))

5 Add data to Maps

5.1 State Level Data

  • Current Totals Cases by State

    df_all %>% 
      filter(Date == max(Date), Population >0, !is.na(sFIPS)) %>% 
      group_by(sFIPS, case_type, Date) %>% 
      summarise(Current_Total = sum(Daily_Total),
                Current_Total_percap = sum(Daily_Total)/sum(Population))->
      state_totals
  • Add to US Maps

    state_totals %>% 
      filter(case_type =="confirmed_US") %>% 
      rename(fips = sFIPS)-> 
      state_cases
    plot_usmap(data = state_cases, values = "Current_Total", color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Confirmed Cases", label = scales::comma)+
      labs(title = "US States",
           subtitle = paste0("Total Cases by State as of ", 
                             max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "top")

    state_totals %>% 
      filter(case_type =="deaths_US") %>% 
      rename(fips = sFIPS)-> 
      state_cases
    plot_usmap(data = state_cases, values = "Current_Total", color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Total Deaths", label = scales::comma)+
      labs(title = "US States",
           subtitle = paste0("Total Deaths by State as of ", max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "top")
  • Per Capita Maps

    state_totals %>% 
      filter(case_type =="confirmed_US") %>% 
      rename(fips = sFIPS)-> 
      state_cases
    plot_usmap(data = state_cases, values = "Current_Total_percap", 
               color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Confirmed Cases per Capita", 
                            label = scales::comma)+
      labs(title = "US States",
           subtitle = paste0("Total Cases per Capita by State as of ",
                             max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "right")

    state_totals %>% 
      filter(case_type =="deaths_US") %>% 
      rename(fips = sFIPS)-> 
      state_cases
    plot_usmap(data = state_cases, values = "Current_Total_percap", 
               color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Total Deaths Per Capita", 
                            label = scales::comma)+
      labs(title = "US States",
           subtitle = paste0("Total Deaths per Capita by State as of ", 
                             ... = max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "right")
  • Using a log Scale

    state_totals %>% 
      filter(case_type =="deaths_US") %>% 
      mutate(Current_Total10 = log10(Current_Total),
             Current_Total_percap10 = log10(Current_Total_percap)) %>% 
      rename(fips = sFIPS)-> 
      state_cases
    plot_usmap(data = state_cases, values = "Current_Total_percap10", 
               color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Total Deaths per Capita (Log 10)", 
                            label = scales::comma)+
      labs(title = "US States",
           subtitle = paste0("Log(10) of Total Deaths per Capita by State as of ", 
                             ... = max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "right")

  • Per Area Maps

    state_cases %>% 
      left_join(states, by = c("fips"="STATE")) %>% 
      mutate(Current_Total_perarea = Current_Total/AREALAND)->
      state_casesa
    
    plot_usmap(data = state_casesa, values = "Current_Total_perarea", 
               color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Total Deaths per unit Area (Log 10)", 
                            label = scales::comma)+
      labs(title = "US States",
           subtitle = paste0("Total Deaths per unit area by State as of ", 
                             ... = max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "right")

5.2 County Level Data

  • Current Totals Cases by County

    df_all %>% 
      filter(Date == max(Date), Population >0, !is.na(cFIPS)) %>% 
      group_by(cFIPS, case_type, Date) %>% 
      summarise(Current_Total = sum(Daily_Total),
                Current_Total_percap = sum(Daily_Total)/sum(Population))->
      county_totals
  • Add to a County Map for New York and New Jersey

    county_totals %>% 
      filter(case_type =="confirmed_US") %>% 
      mutate(Current_Total_log2 = log2(Current_Total)) %>% 
      rename(fips = cFIPS)-> 
      county_cases
    plot_usmap(data = county_cases, include = c("NY","NJ"),
               values = "Current_Total", color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Confirmed Cases", label = scales::comma)+
      labs(title = "US States",
           subtitle = paste0("Total Cases by County as of ", 
                             max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "top")

  • Add to a County/City Map for DMV

    plot_usmap(data = county_cases, include = dmv_county_fips,
               values = "Current_Total", color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Confirmed Cases", label = scales::comma)+
      labs(title = "DMV Region",
           subtitle = paste0("Total Cases by County/City as of ", 
                             max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "top")

    county_totals %>% 
      filter(case_type =="deaths_US") %>% 
      rename(fips = cFIPS)-> 
      county_deaths
    plot_usmap(data = county_deaths, include = dmv_county_fips,
               values = "Current_Total", color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Confirmed Deaths", label = scales::comma)+
      labs(title = "DMV Region",
           subtitle = paste0("Total Deaths by County/City as of ", 
                             max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "top")

  • Per Capita Cases

    plot_usmap(data = county_cases, include = dmv_county_fips,
               values = "Current_Total_percap", color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Confirmed Cases Per Capita", label = scales::comma)+
      labs(title = "DMV Region",
           subtitle = paste0("Total Cases per Capita by County/City as of ", 
                             max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "top")
  • Per Capita Deaths in DMV

    county_totals %>% 
      filter(case_type =="deaths_US") %>% 
      rename(fips = cFIPS)-> 
      county_deaths
    plot_usmap(data = county_deaths, include = dmv_county_fips,
               values = "Current_Total_percap", color = "blue") + 
      scale_fill_continuous(low = "white", high = "red", 
                            name = "Confirmed Deaths per Capita", label = scales::comma)+
      labs(title = "DMV Region",
           subtitle = paste0("Total Deaths per Capita by County/City as of ", 
                             max(state_cases$Date))) + 
      theme(panel.background = element_rect(color = "black", fill = "white")) +
      theme(legend.position = "top")